#include "Genome_Index.h"

CGenome_Index::CGenome_Index(void)
{
    this->initialization();
}

CGenome_Index::~CGenome_Index(void)
{
    delete this->pgenomeNT;
    delete this->pgenomeNTInBits;
}

int CGenome_Index::initialization(void)
{
    this->NO_OF_BUCKET = 0x4000000; // 64M buckets, 256 MB  4^13
    // this->NO_OF_BUCKET = 0x1000000; // 16M buckets,  64 MB, 4^12
    // this->NO_OF_BUCKET = 0x400000;  //  4M buckets,  16 MB, 4^11

    this->pgenomeNT = NULL;
    this->pgenomeNTInBits = NULL;
    this->fpHashValue = NULL;
    this->fpSeedKey = NULL;

    this->caRefName[0] = '\0';

    this->bEXTEND_SEED = true;
    this->iHashDigits = 0;
    this->iKeyDigits = 0;
    return(0);
}

unsigned int CGenome_Index::getHashValue(char* slide_window) const
{
    CReadInBits r(slide_window);
    return(this->getHashValue(r) & (NO_OF_BUCKET - 1));
}

// generate Key for sorting
unsigned int CGenome_Index::getSeedKey(char* slide_window) const
{
    CReadInBits r(slide_window);
    return(getSeedKey(r));
}

unsigned int CGenome_Index::getHashValue(CReadInBits r) const
{
    if (this->fpHashValue != NULL)
        return(this->fpHashValue(r) & (NO_OF_BUCKET - 1)); // DEBUG
    else
        return(0);
}

unsigned int CGenome_Index::getSeedKey(CReadInBits r) const
{
    if (this->fpSeedKey != NULL)
        return(this->fpSeedKey(r, this->iKeyDigits));
    else
        return(0);
}

int CGenome_Index::chooseHashFunction(unsigned int uiReadLength, int chosenSeedId)
{
    const char* F0SeedRepeat  = "111";
    const char* F1SeedRepeat  = "1110";
    const char* F2SeedRepeat  = "1110100";
    const char* S11SeedRepeat = "1111001000";
    const char* F3SeedRepeat  = "11101001000";
    const char* S20SeedRepeat = "11110010000";
    const char* S12SeedRepeat = "11110010000000"; //Only for read length 44 - 50
    const char* F4SeedRepeat  = "1100010000";

    const char* SeedRepeatPattern;
    // (1) choose SeedRepeatPattern and Hash function to bucket ID
    switch (chosenSeedId) {
    case 0:
        SeedRepeatPattern = F0SeedRepeat;
        this->fpHashValue = selectF0(uiReadLength);
        break;
    case 1:
        SeedRepeatPattern = F1SeedRepeat;
        this->fpHashValue = selectF1(uiReadLength);
        break;
    case 2:
        SeedRepeatPattern = F2SeedRepeat;
        this->fpHashValue = selectF2(uiReadLength);
        break;
    case 11: // full sensitive to three mismatches but two of them must to be consecutive.
        SeedRepeatPattern = S11SeedRepeat;
        this->fpHashValue = selectS1_1(uiReadLength);
        break;
    case 20: // full sensitive to 2 pairs of consecutive mismatches
        SeedRepeatPattern = S20SeedRepeat;
        this->fpHashValue = selectS2_0(uiReadLength);
        break;
    case 3:
        SeedRepeatPattern = F3SeedRepeat;
        this->fpHashValue = selectF3(uiReadLength);
        break;
    case 12: // full sensitive to four mismatches but two of them must to be consecutive.
        if (uiReadLength >= 44) {
            SeedRepeatPattern = S12SeedRepeat;
            this->fpHashValue = getS1_2SeedHashValue;
            break;
        } // otherwise use F4
    case 4:
        SeedRepeatPattern = F4SeedRepeat;
        this->fpHashValue = selectF4(uiReadLength);
        this->fpSeedKey = &returnDummyHashKey; // DEFAULT
        break;
    default:
        SeedRepeatPattern = F3SeedRepeat; // DEFAULT
        LOG_INFO("\nInfo %d: The sensitivity threshold haven't been implemented.\n",\
                 INFO_LOG);
        LOG_INFO("\nInfo %d: Use seed pattern which is full sensitivit to 3 mismatches instead.\n",\
                 INFO_LOG);
    }
    if (this->fpHashValue == NULL) {

        string seedStr = seedSymbol(chosenSeedId);
        LOG_INFO("Info %d: Read length is too short (or long) for the seed %s.\n" \
                 , ERROR_LOG, seedStr.c_str());
        return(-1);
    }

    // (2) Get the hashkey function for binary search
    int DEFAULT_HASHING_BITS = 13;
    this->uiNoOfShift = (unsigned int)strlen(SeedRepeatPattern) - 1;

    if (bEXTEND_SEED) {
        this->iHashDigits = getNoOfCaredPositions4FullRead(SeedRepeatPattern, uiReadLength);
        // Speical setting to extend 34-bp and 32 reads because the min weight is twelve only
        if (uiReadLength == 34 &&
                ((unsigned int)chosenSeedId == FULL_SENSITIVE_OPT_TO_TWO_BASE_MIS || chosenSeedId == 3)) {
            DEFAULT_HASHING_BITS = 12;
        } else if (uiReadLength == 32 && chosenSeedId == 3) {
            DEFAULT_HASHING_BITS = 10;
        }
        int seedWeight = getNoOfCaredPositions(SeedRepeatPattern, uiReadLength);
        if (DEFAULT_HASHING_BITS > seedWeight) {
            string msg("The mapping could be slow.\n");
            LOG_INFO("Info %d: Seed weight %d is low due to the short read length.%s" \
                     , INFO_LOG, seedWeight, msg.c_str());
            this->iHashDigits = getNoOfCaredPositions(SeedRepeatPattern, uiReadLength);
            this->bEXTEND_SEED = false;
        }
    } else {
        this->iHashDigits = getNoOfCaredPositions(SeedRepeatPattern, uiReadLength);
    }

    if (this->iHashDigits > DEFAULT_HASHING_BITS) {
        this->iKeyDigits = this->iHashDigits - DEFAULT_HASHING_BITS;
        this->iHashDigits = DEFAULT_HASHING_BITS;
    } else {
        this->iKeyDigits = 0;
    }
    // When hashing index, use this->uiSeedLength to filter out sliding windows with N
    this->uiSeedLength = uiReadLength - this->uiNoOfShift;
    // If extended seed method is used, this->uiSeedLength will be changed to read length.

    // (3) chooose See key function
    return(this->chooseSeedKeyFunction(uiReadLength, chosenSeedId));
}

int CGenome_Index::chooseSeedKeyFunction(unsigned int uiReadLength, int chosenSeedId)
{

    switch (chosenSeedId) {
    case 0:
        if (iKeyDigits > 0) {
            this->fpSeedKey = &getF0SeedKey;
        } else {
            this->fpSeedKey = &returnDummyHashKey;
        }
        break;
    case 1:
        if (iKeyDigits > 0) {
            this->fpSeedKey = &getF1SeedKey;
        } else {
            this->fpSeedKey = &returnDummyHashKey;
        }
        break;
    case 2: // Full sensitivie to 2 mis
        if (iKeyDigits > 0) {
            this->fpSeedKey = &getF2SeedKey;
        } else {
            this->fpSeedKey = &returnDummyHashKey;
        }
        break;
    case 11: // Full sensitivie to 1 color + 1 base mis
        if (iKeyDigits > 0) {
            this->fpSeedKey = &getS1_1SeedKey;
        } else {
            this->fpSeedKey = &returnDummyHashKey;
        }
        break;
    case 20: // full sensitive to 2 pairs of consecutive mis.
        if (iKeyDigits > 0) {
            if (uiReadLength == 34) {
                this->fpSeedKey = &getS2_0SeedKey4ReadLength34;
            } else
                this->fpSeedKey = &getS2_0SeedKey;
        } else {
            this->fpSeedKey = &returnDummyHashKey;
        }
        break;
    case 3: // Full sensitivie to 3 mis
        if (iKeyDigits > 0) {
            if (uiReadLength == 34) {
                this->fpSeedKey = &getF3SeedKey4ReadLength34;
            } else if (uiReadLength == 32) {
                this->fpSeedKey = &getF3SeedKey4ReadLength32;
            } else
                this->fpSeedKey = &getF3SeedKey;
        } else {
            this->fpSeedKey = &returnDummyHashKey; // DEBUG
        }
        break;
    case 12: // Full sensitive to 1 color + 2 mis
        if (iKeyDigits > 0) {
            if (46 <= uiReadLength && uiReadLength <= 49) {
                this->fpSeedKey = &getS1_2SeedKey4ReadLength46_49;
            } else {
                this->fpSeedKey = &returnDummyHashKey; // DEFAULT
            }
            break;
        } // otherwise use F4
    case 4:
        this->fpSeedKey = &returnDummyHashKey; // DEFAULT
        break;
    default:
        this->fpSeedKey = &returnDummyHashKey; // DEFAULT
        if (chosenSeedId > (int)MAX_MISMATCH_THRESHOLD) {
            LOG_INFO("Info %d: No Seed key function is defined for\
                full sensitive opt %d!\n", WARNING_LOG, chosenSeedId);
            return(-1);
        }
    }
    return (0);
}

